*"Factset Revere Relationship Clean";

/*
We remove duplicate customer-supplier relationship and extraneous relationships whose start and end 
dates are encompassed within a longer relationship between the same pair of firms. In addition, we 
consolidate various relationships between the same pair of firms across different time periods into
a continuous relationship if the inverval between consecutive relationships does not exceed six months.
*/



libname lib '...';




%let var_=REL_TYPE SOURCE_COMPANY_ID TARGET_COMPANY_ID REL_TYPE START_ END_ SOURCE_NAME TARGET_NAME SOURCE_CUSIP TARGET_CUSIP SOURCE_ISIN TARGET_ISIN; 

/*option 1: updated Factset Dataset*/
data CS_1(keep=&var_);
set lib.FACTSET_REVERE_REL;
if REL_TYPE="CUSTOMER" or REL_TYPE="SUPPLIER";
run;


Data CS_CUSTOMER;
set CS_1;
where REL_TYPE="CUSTOMER";
run;


Data CS_SUPPLIER;
set CS_1;
where REL_TYPE="SUPPLIER";
run;


Data CS_CUSTOMER (rename=(SOURCE_COMPANY_ID=supplier_id TARGET_COMPANY_ID=customer_id SOURCE_ISIN=SUPPLIER_ISIN TARGET_ISIN=CUSTOMER_ISIN SOURCE_NAME=supplier TARGET_NAME=customer SOURCE_CUSIP=supplier_cusip TARGET_CUSIP=customer_cusip));
set CS_CUSTOMER;
run;


Data CS_SUPPLIER (rename=(SOURCE_COMPANY_ID=customer_id TARGET_COMPANY_ID=supplier_id SOURCE_ISIN=CUSTOMER_ISIN TARGET_ISIN=SUPPLIER_ISIN SOURCE_NAME=customer TARGET_NAME=supplier SOURCE_CUSIP=customer_cusip TARGET_CUSIP=supplier_cusip));
set CS_SUPPLIER(drop=REL_TYPE);
REL_TYPE="CUSTOMER";
run;


Data CS_2;
set CS_CUSTOMER CS_SUPPLIER;
run;


proc datasets library=work nolist;
delete Cs_1 Cs_customer Cs_supplier;
change Cs_2=CS;
run;



/*Sanity Check*/
proc sort data=CS nodupkey; by _all_; run;

/*Create relationship ID*/
data ID (keep=supplier_id customer_id) ;
set CS;
run;

proc sort data=ID nodupkey; by _all_; run;

data ID;
set ID;
ID=_n_;
run;


proc sql;
create table CS
as select a.*, b.*
from ID as a, CS as b
where a.supplier_id=b.supplier_id and a.customer_id=b.customer_id;
quit;
/********************************************************************************************/

data CS;
retain START_ END_ REL_TYPE ID supplier_id customer_id supplier_isin customer_isin supplier customer supplier_cusip customer_cusip;
set CS;
run;



/*Remove Duplicates and Redundant Observations*/

proc sort data=CS; by ID START_ END_; run;

/*When start date is the same as end date*/

data CS;
retain START_ END_ REL_TYPE ID supplier_id customer_id supplier_isin customer_isin supplier customer supplier_cusip customer_cusip;
do until (last.ID);
set CS;
by ID START_ END_;
retain fdate ldate;
format fdate ldate date9.;
if first.ID then do;
fdate=START_;
ldate=END_;
end;
if START_>ldate then do;
output;
fdate=START_;
ldate=END_;
end;
else ldate=max(ldate, END_);
end;
output;
run;


/*Consolidate various relationships between the same pair of firms across different time periods into
a continuous relationship if the inverval between consecutive relationships does not exceed six months
*/
data CS;
retain START_ END_ REL_TYPE ID supplier_id customer_id supplier_isin customer_isin fdate ldate supplier customer supplier_cusip customer_cusip;
do until (last.ID);
set CS (rename=(fdate=sdate ldate=edate));
by ID START_ END_;
retain fdate ldate;
format fdate ldate date9.;
if first.ID then do;
fdate=sdate;
ldate=edate;
end;
if intck('month', ldate, sdate, 'c')>6 then do;
output;
fdate=sdate;
ldate=edate;
end;
else ldate=max(ldate, edate);
end;
output;
drop START_ END_ sdate edate;
run;


data lib.CS(rename=(id=pair_id));
set CS;
run;
